from src.bilibili_crawler import BilibiliDanmuCrawler
import pandas as pd

def main():
    print("🎬 B站大语言模型弹幕分析爬虫")
    print("=" * 50)
    
    # 初始化爬虫
    crawler = BilibiliDanmuCrawler()
    
    # 定义搜索关键词
    keywords = ["大语言模型", "大模型", "LLM"]
    
    print(f"搜索关键词: {keywords}")
    print(f"目标视频数: 30（测试用，可后续调整到360）")
    print("开始爬取...")
    
    try:
        # 爬取数据（先用30个视频测试）
        danmu_df = crawler.crawl_danmu_by_keywords(keywords, total_videos=30)
        
        if len(danmu_df) > 0:
            # 保存数据
            crawler.save_data(danmu_df, 'bilibili_llm_danmu_test')
            
            # 显示统计信息
            print("\n📊 爬取结果统计:")
            print(f"总弹幕数量: {len(danmu_df)}")
            print(f"涉及视频数量: {danmu_df['bvid'].nunique()}")
            print("关键词分布:")
            for keyword, count in danmu_df['keyword'].value_counts().items():
                print(f"  {keyword}: {count} 条弹幕")
                
            # 显示前几条弹幕示例
            print("\n📝 弹幕示例:")
            for i, row in danmu_df.head(5).iterrows():
                print(f"  {i+1}. {row['danmu']}")
                
        else:
            print("❌ 没有爬取到任何弹幕数据")
            
    except Exception as e:
        print(f"❌ 爬虫运行出错: {e}")
        print("请检查网络连接或稍后重试")

if __name__ == "__main__":
    main()